Issues

  1. with the exception of landmark 1, the shape of fish from pools 1 and 2 are very similar and the shape of fish from pools 3 and 4 are very similar. This would mean the brood effects are much stronger than treatment effects. But…
  2. Why the polar opposite with landmark 1? Is it a data management bug? Hard to think how this could happen. Or is this the only landmark where treatment effect >> brood effect? The reason this raises a flag is because the pattern is so clean in x1 vs x2 (and some other lm) and doesn’t seem to be because of noise.

For overall picture look at the four plots under header PC plots

For detailed picture look at the plots under header Univariate fits and ANCOVA fits, especially compare x1 (left panel of first plot in each section) and x2 (left panel of second plot in each section)

1 Setup

geom_ancova_grouped <- function(m1, group_by = "pred"){
  geom_smooth(method = "lm",
              mapping = aes(y = predict(m1),
                            linetype = get(group_by))
  )
}

2 Import and wrangle

2.1 Import and wrangle raw coordinates

  1. Import raw coordinate values
data_folder <- "data/ghalambor"
file_name <- "allff.raw.txt"
file_path <- here(data_folder, file_name)
raw_lm <- fread(file_path) %>%
  clean_names()
raw_lm[, caudal_fin := sqrt((c_fx-x6)^2 + (cfy-y6)^2)]
raw_lm[, caudal_fin2 := c_fx-x6]
# temp <- raw_lm[dataset == "greenhouse",
#                .SD,
#                .SDcols = c("id", "gen", "treatment", "caudal_fin", "caudal_fin2")]
  1. what do raw coords look like?
raw_long <- melt(raw_lm,
              id.vars = c("id", "dataset"),
              measure.vars = list(paste0("x", 1:12),
                                  paste0("y", 1:12)),
              variable.name = c("landmark"),
              value.name = c("x", "y")
              )
qplot(x = x, y = y, data = raw_long) +
  coord_fixed()

  1. Superimpose using TPR to get into standard orientation
axis_levels <- c("x", "y")
lm_levels <- 1:12
lm_cols <- do.call(paste0, expand.grid(axis_levels, lm_levels))
lm_cols_x13 <- c(lm_cols, "x13")
lm_cols_13 <- c(lm_cols_x13, "y13")

lm_mat <- raw_lm[, .SD, .SDcols = lm_cols] %>%
  as.matrix()
lm_array <- matrix_2_array(lm_mat)
plot_shapes(lm_array)

lm_tpr <- tpr(lm_array, 1, 6)
plot_shapes(lm_tpr)

  1. Superimpose using GRF
lm_grf <- grf(lm_tpr)
plot_shapes(lm_grf)

  1. Merge imported and grf fit data
lm_grf_dt <- array_2_matrix(lm_grf) %>%
  data.table
colnames(lm_grf_dt) <- lm_cols
keep_cols <- setdiff(names(raw_lm), lm_cols)
shape <- cbind(raw_lm[, .SD, .SDcols = keep_cols],
                lm_grf_dt)
setnames(shape, "treatment", "drainage_treatment")
  1. Add size
centroid_size <- function(coords){
  cols <- 1:length(coords)
  even_col <- cols[lapply(cols, "%%", 2) == 0]
  odd_col <- cols[lapply(cols, "%%", 2) != 0]
  x <- coords[even_col]
  y <- coords[odd_col]
  xbar <- mean(x)
  ybar <- mean(y)
  cs <- sqrt(sum((x-xbar)^2 + (y-ybar)^2))
  return(cs)
}

cs <- apply(lm_mat, 1, centroid_size)
shape[, centroid_size := cs]

n <- dim(lm_array)[3]
for(i in 1:n){
  fig_i <- lm_array[,,i]
  shape[i, median_size := median_size(fig_i)]
}
shape[, x13 := mean(x6) + caudal_fin/median_size]
  1. Merge drainage data
data_folder <- "data/ghalambor"
file_name <- "stream to drainage map.txt"
file_path <- here(data_folder, file_name)
drainage_map <- fread(file_path) %>%
  clean_names()

shape1 <- merge(shape, drainage_map,
                by = "stream",
                sort = FALSE)
shape <- shape1
  1. create stream5 col
shape[, stream5 := substr(stream_id, 1, 5) %>% tolower()]
shape[, site := paste(stream5, tolower(pred_state), sep = "-")]

2.2 Reshape shape data

  1. Reshape the shape data wide to long, stacking the x coordinates into a single x column and y coordinates into a single y column.
# names(shape_wide)
shape_long <- melt(shape,
              id.vars = c("id", "dataset", "stream_id",
                          "stream", "pred_state", "gen",
                          "drainage_treatment"),
              measure.vars = list(paste0("x", 1:13),
                                  paste0("y", 1:12)),
              variable.name = c("landmark"),
              value.name = c("x", "y")
              )
  1. A simple view of the coordinates to see what we have.
# unique(shape_wide$landmark)
qplot(x = x, y = y, data = shape_long) +
  coord_fixed()

  1. Create a column with the corrected treatment level
# unique(shape$dataset)
shape[dataset == "wild", treatment := "Wild"]
shape[dataset == "garden", treatment := "No Flow"]
shape[dataset == "greenhouse" &
        drainage_treatment == "pike", treatment := "Flow + pike"]
shape[dataset == "greenhouse" &
        drainage_treatment == "no_pike", treatment := "Flow"]
  1. Change quII to quar. In the garden dataset, stream_id includes “quII”. In the greenhouse dataset, stream_id includes “quar”. Are these the same?
unique(shape$stream_id)
##  [1] "arimaRi"       "AripoIn"       "AripoDC"       "ElCedroMain"  
##  [5] "ElCedroDavid"  "ElCedroIn"     "GuanapoMain"   "JordanTrib"   
##  [9] "Marianne"      "MarianneTrib"  "Mauzica"       "Oropuche"     
## [13] "Paria"         "Quare6"        "Quare6_2"      "QuareII"      
## [17] "TurareHigh"    "TurareMed"     "turaretriblow" "turaremedpred"
## [21] "yaratrib"      "ardc"          "arin"          "guan"         
## [25] "orop"          "quII"          "turu"          "quar"
shape[stream_id == "quII", stream_id := "quar"]

3 Create lab-reared subset

f2 <- shape[gen != "f0"]

# recode treatment
recode <- FALSE
if(recode == TRUE){
  f2[, treatment_old := treatment]
  f2[gen == "p03", treatment := "Flow + pike"]
  f2[gen == "p01", treatment := "Flow"]
}

flow_levels <- c("No Flow", "Flow")
pred_levels <- c("Flow", "Flow + pike")
treatment_levels <- union(flow_levels, pred_levels)
treatment_tank_levels <- c("No Flow f2", "Flow p02", "Flow p03",
                           "Flow + pike p01", "Flow + pike p04")
# recoded
if(recode == TRUE){
  treatment_tank_levels <- c("No Flow f2", "Flow p01", "Flow p02", "Flow + pike p03", "Flow + pike p04")
}

f2[, treatment_tank := factor(paste(treatment, gen),
   levels = treatment_tank_levels)]
f2[, treatment := factor(treatment,
                              levels = treatment_levels)]

stream_id_levels <- c("ardc", "arin", "orop", "quar", "turu", "guan")
pred_state_levels <- c("High", "Med", "Low", "LowIntro" )
f2[, stream_id := factor(stream_id, levels = stream_id_levels)]
f2[, pred_state := factor(pred_state, levels = pred_state_levels)]

# create sample_name
f2[, sample_name := paste("fish", .I)]
# f2[, sample_name := .I]

# remove ".txt" from id
f2[, id := str_replace_all(id, ".txt", "")]

3.1 Merge brood ID

The brood file column locale is inconsistent with other files. Which is correct?

data_folder <- "data/ghalambor/rebroodvtreatment"
file_name <- "Greenhouse_Female_Data.xlsx"
file_path <- here(data_folder, file_name)

brood <- read_excel(file_path) %>%
  data.table() %>%
  clean_names()

# clean mark column
brood[, mark := str_replace_all(mark, " ", "")]
brood[, mark := str_replace_all(mark, ",", "_")]
brood[, mark := str_replace_all(mark, "&", "_")]
brood[, mark := str_replace_all(mark, "LLFblack", "blackLLF")]
brood[, mark := str_replace_all(mark, "and", "_")]

# what is yellowRUF_blackbelow"
# what is "purple_blackRUF" -> "purpleRUF_blackRUF"
# what is "yellowFUF" -> "yellowRUF"
# what is "greenRUF_blackLLB" -> "greenRUF_blackLLF"
brood[mark == "purple_blackRUF", mark := "purpleRUF_blackRUF"]
brood[mark == "yellowFUF", mark := "yellowRUF"]
brood[mark == "greenRUF_blackLLB", mark := "greenRUF_blackLLF"]

# Pop ID    Name    Locale
# 1 ardc    3
# 2 arin    4
# 3 guan    6
# 4 orop    1
# 5 quii    2
# 6 turu    5

site_list <- c("orop", "quar", "ardc", "arin", "turu", "guan")
brood[, site := site_list[locale]]
brood[, number := ifelse(id_number < 10,
                         paste0("0", id_number),
                         as.character(id_number))]
brood[, pool_id := paste0("p0", pool)]
brood[, id := paste0(site, number, pool_id)]

y_cols <- c("id", "mark")
f2 <- merge(f2, brood[, .SD, .SDcols = y_cols],
                 by = "id",
                 all.x = TRUE)
f2[, mark_all := ifelse(is.na(mark), "none", mark)]
f2[, family := tstrsplit(mark_all, "_", keep = 1)]
# f2_temp <- merge(f2, brood[, .SD, .SDcols = y_cols],
#                  by = "id",
#                  all = TRUE)
# y_cols <- c("id", "mark", "x1")
# View(f2_temp[dataset == "greenhouse", .SD, .SDcols = y_cols])
f2_marks <- f2[dataset == "greenhouse", .(N = .N),
               by = .(stream_id, family, treatment_tank)]
f2_marks_wide <- dcast(f2_marks,
                       stream_id + family ~ treatment_tank,
                       value.var = "N")

4 Multivariate (PCA) Exploration of Treatment

PCA is not a multi-group method (at least without some modifications) so this is suboptimal but PCA should still capture most of among-group variation as long as the ratio of among:within is high.

4.1 size free

Y <- f2[, .SD, .SDcols = lm_cols_x13] %>%
  as.matrix()
row.names(Y) = f2$sample_name
S <- cov(Y)
E <- eigen(S)$vectors
f2[, pc1 := (Y %*% E[,1])[,1]]
f2[, pc2 := (Y %*% E[,2])[,1]]
f2[, pc3 := (Y %*% E[,3])[,1]]

y_cols <- c("pc1", "pc2", "pc3", lm_cols_x13)
pc_loadings <- cor(f2[, .SD, .SDcols = y_cols])[c(-1,-2,-3), 1:3]
pc_loadings %>%
  kable(digits = 2) %>%
  kable_styling()
pc1 pc2 pc3
x1 0.48 -0.03 0.11
y1 -0.03 -0.10 -0.07
x2 -0.79 -0.38 -0.28
y2 -0.12 -0.10 0.34
x3 0.45 0.22 0.29
y3 0.23 0.04 0.68
x4 -0.81 -0.32 0.26
y4 0.32 0.13 0.51
x5 0.39 0.42 -0.33
y5 -0.48 -0.14 -0.20
x6 0.44 -0.16 -0.56
y6 -0.56 -0.17 -0.48
x7 0.12 0.15 -0.74
y7 -0.17 -0.04 -0.50
x8 -0.36 -0.35 0.31
y8 0.04 0.15 0.05
x9 0.39 0.05 0.44
y9 0.34 0.23 0.07
x10 -0.03 0.06 0.31
y10 -0.04 0.29 -0.18
x11 -0.06 0.12 0.09
y11 0.39 -0.11 -0.07
x12 0.10 -0.28 -0.10
y12 -0.33 -0.27 -0.09
x13 -0.86 0.50 0.00
gg1 <- ggplot(data = f2,
       aes(x = pc1,
           y = pc2,
           color = treatment_tank)) +
  geom_point() +
  theme_pubr() +
  ggtitle("PC1 v PC2: size free") +
  scale_color_manual(values = pal_okabe_ito) +
  theme(legend.title = element_blank()) +
  guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
  
  NULL

gg2 <- ggplot(data = f2,
       aes(x = pc2,
           y = pc3,
           color = treatment_tank)) +
  geom_point() +
  theme_pubr() +
  ggtitle("PC2 v PC3: size free") +
  scale_color_manual(values = pal_okabe_ito) +
  theme(legend.title = element_blank()) +
  guides(color = guide_legend(nrow = 2, byrow = TRUE)) +

  NULL

plot_grid(gg1, gg2, nrow = 2)

scatter3d(pc3 ~ pc1 + pc2 | treatment_tank,
          data = f2,
          surface = FALSE)
# unstandardized
dist_mat <- dist(Y, method = 'euclidean')
hclust_avg <- hclust(dist_mat, method = 'average')
# plot(hclust_avg)

# extract dendrogram segment data
dendrogram_data <- dendro_data(hclust_avg)
dendrogram_segments <- dendrogram_data$segments # contains all dendrogram segment data

# get terminal dendrogram segments
dendrogram_ends <- dendrogram_segments %>%
 filter(yend == 0) %>% # filter for terminal dendrogram ends
 left_join(dendrogram_data$labels, by = "x") %>% # .$labels contains the row names from dist_matrix (i.e., sample_name)
 rename(sample_name = label) %>%
 left_join(f2, by = "sample_name") 
# dataframe now contains only terminal dendrogram segments and merged metadata associated with each iris

gg_tree <- ggplot() +
  geom_segment(data = dendrogram_segments,
               aes(x = x,
                   y = y,
                   xend = xend,
                   yend = yend)) +
  geom_segment(data = dendrogram_ends,
               aes(x = x,
                   y = y.x,
                   xend = xend,
                   yend = yend,
                   # text = paste("sample name: ",
                   #              sample_name, "<br>",
                   #              "species: ", Species),
                   color = treatment_tank)) +
  # test aes is for plotly
  scale_color_manual(values = pal_okabe_ito[1:5]) +
  scale_y_reverse() +
  coord_flip() +
  theme_bw() +
  theme(legend.position = "none") +
  ylab("Distance") + # flipped x and y coordinates for aesthetic reasons
  ggtitle("Treatment clusters -- size free")

gg_tree

4.2 Allometry free

Y_raw <- f2[, .SD, .SDcols = lm_cols_x13] %>%
  as.matrix()
m1 <- lm(Y ~ median_size, data = f2)
Y <- residuals(m1)
row.names(Y) = f2$sample_name
S <- cov(Y)
E <- eigen(S)$vectors
f2[, pc1_allom_free := (Y %*% E[,1])[,1]]
f2[, pc2_allom_free := (Y %*% E[,2])[,1]]
f2[, pc3_allom_free := (Y %*% E[,3])[,1]]

y_cols <- c("pc1_allom_free", "pc2_allom_free", "pc3_allom_free", lm_cols_x13)
pc_AF_loadings <- cor(f2[, .SD, .SDcols = y_cols])[c(-1,-2,-3), 1:3]

pc_both_loadings <- cbind(pc_loadings, pc_AF_loadings)
pc_both_loadings %>%
  kable(digits = 2) %>%
  kable_styling()
pc1 pc2 pc3 pc1_allom_free pc2_allom_free pc3_allom_free
x1 0.48 -0.03 0.11 0.18 -0.10 -0.32
y1 -0.03 -0.10 -0.07 -0.07 0.07 -0.31
x2 -0.79 -0.38 -0.28 -0.68 0.37 -0.31
y2 -0.12 -0.10 0.34 -0.28 -0.07 -0.07
x3 0.45 0.22 0.29 0.40 -0.24 0.24
y3 0.23 0.04 0.68 -0.14 -0.29 0.21
x4 -0.81 -0.32 0.26 -0.72 0.26 0.42
y4 0.32 0.13 0.51 -0.02 -0.34 0.01
x5 0.39 0.42 -0.33 0.53 -0.23 0.05
y5 -0.48 -0.14 -0.20 -0.47 0.08 -0.48
x6 0.44 -0.16 -0.56 0.50 0.36 -0.14
y6 -0.56 -0.17 -0.48 -0.36 0.25 -0.42
x7 0.12 0.15 -0.74 0.28 0.06 -0.39
y7 -0.17 -0.04 -0.50 0.05 0.19 -0.31
x8 -0.36 -0.35 0.31 -0.42 0.25 0.24
y8 0.04 0.15 0.05 0.16 -0.05 0.50
x9 0.39 0.05 0.44 0.15 -0.19 0.15
y9 0.34 0.23 0.07 0.40 -0.12 0.51
x10 -0.03 0.06 0.31 -0.06 -0.10 0.41
y10 -0.04 0.29 -0.18 0.19 -0.13 0.31
x11 -0.06 0.12 0.09 -0.01 -0.13 0.07
y11 0.39 -0.11 -0.07 0.26 0.13 -0.03
x12 0.10 -0.28 -0.10 -0.08 0.20 -0.39
y12 -0.33 -0.27 -0.09 -0.34 0.23 -0.24
x13 -0.86 0.50 0.00 -0.61 -0.52 -0.10
cor(pc_both_loadings)[4:6, 1:3] %>%
  kable(digits = 2) %>%
  kable_styling()
pc1 pc2 pc3
pc1_allom_free 0.90 0.49 -0.18
pc2_allom_free -0.30 -0.85 -0.50
pc3_allom_free 0.19 0.34 0.57
gg1 <- ggplot(data = f2,
       aes(x = pc1_allom_free,
           y = pc2_allom_free,
           color = treatment_tank)) +
  geom_point() +
  theme_pubr() +
  ggtitle("PC1 v PC2: allometry free") +
  scale_color_manual(values = pal_okabe_ito) +
  theme(legend.title = element_blank()) +
  guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
  
  NULL

gg2 <- ggplot(data = f2,
       aes(x = pc2,
           y = pc3,
           color = treatment_tank)) +
  geom_point() +
  theme_pubr() +
  ggtitle("PC2 v PC3: allometry free") +
  scale_color_manual(values = pal_okabe_ito) +
  theme(legend.title = element_blank()) +
  guides(color = guide_legend(nrow = 2, byrow = TRUE)) +

  NULL

gg1

gg2

#plot_grid(gg1, gg2, nrow = 2)
# unstandardized
dist_mat <- dist(Y, method = 'euclidean')
hclust_avg <- hclust(dist_mat, method = 'average')
# plot(hclust_avg)

# extract dendrogram segment data
dendrogram_data <- dendro_data(hclust_avg)
dendrogram_segments <- dendrogram_data$segments # contains all dendrogram segment data

# get terminal dendrogram segments
dendrogram_ends <- dendrogram_segments %>%
 filter(yend == 0) %>% # filter for terminal dendrogram ends
 left_join(dendrogram_data$labels, by = "x") %>% # .$labels contains the row names from dist_matrix (i.e., sample_name)
 rename(sample_name = label) %>%
 left_join(f2, by = "sample_name") 
# dataframe now contains only terminal dendrogram segments and merged metadata associated with each iris

gg_tree <- ggplot() +
  geom_segment(data = dendrogram_segments,
               aes(x = x,
                   y = y,
                   xend = xend,
                   yend = yend)) +
  geom_segment(data = dendrogram_ends,
               aes(x = x,
                   y = y.x,
                   xend = xend,
                   yend = yend,
                   # text = paste("sample name: ",
                   #              sample_name, "<br>",
                   #              "species: ", Species),
                   color = treatment_tank)) +
  # test aes is for plotly
  scale_color_manual(values = pal_okabe_ito[1:5]) +
  scale_y_reverse() +
  coord_flip() +
  theme_bw() +
  theme(legend.position = "none") +
  ylab("Distance") + # flipped x and y coordinates for aesthetic reasons
  ggtitle("Treatment clusters -- unstandardized")

gg_tree

Notes

  1. Distinct treatment groups not very conspicuous. Is this largely size + pc1?

4.3 PC plots

gg1 <- ggplot(data = f2,
       aes(x = median_size,
           y = pc1,
           color = treatment_tank)) +
  geom_point() +
  theme_pubr() +
  ggtitle("PC1 v median size: size free") +
  scale_color_manual(values = pal_okabe_ito) +
  theme(legend.title = element_blank()) +
  guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
  
  NULL

gg2 <- ggplot(data = f2,
       aes(x = median_size,
           y = pc1_allom_free,
           color = treatment_tank)) +
  geom_point() +
  theme_pubr() +
  ggtitle("PC1 v median size: allometry free") +
  scale_color_manual(values = pal_okabe_ito) +
  theme(legend.title = element_blank()) +
  guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
  
  NULL


gg1

gg2

pd <- position_dodge(0.4)
gg <- ggplot(data = f2,
       aes(x = treatment_tank,
           y = pc1,
                 color = stream_id)) +
  geom_point(position = pd) +
  theme_pubr() +
  scale_color_manual(values = pal_okabe_ito) +
  ggtitle("Size-free PC1 by treatment") +
  
  NULL
gg

pd <- position_dodge(0.4)
gg <- ggplot(data = f2,
       aes(x = treatment_tank,
           y = pc1_allom_free,
                 color = stream_id)) +
  geom_point(position = pd) +
  theme_pubr() +
  scale_color_manual(values = pal_okabe_ito) +
  ggtitle("Allometry-free PC1 by treatment") +
  
  NULL
gg

pd <- position_dodge(0.4)
gg <- ggplot(data = f2,
       aes(x = treatment_tank,
           y = pc2,
                 color = stream_id)) +
  geom_point(position = pd) +
  theme_pubr() +
  scale_color_manual(values = pal_okabe_ito) +
  ggtitle("Size-free PC1 by treatment") +
  
  NULL
gg

pd <- position_dodge(0.4)
gg <- ggplot(data = f2,
       aes(x = treatment_tank,
           y = pc2_allom_free,
                 color = stream_id)) +
  geom_point(position = pd) +
  theme_pubr() +
  scale_color_manual(values = pal_okabe_ito) +
  ggtitle("Allometry-free PC1 by treatment") +
  
  NULL
gg

5 Univariate effects of Treatment

5.1 Univariate fits

  1. Model fits to each coordinate

5.1.1 by stream_id

# supress message "NOTE: A nesting structure was detected in the fitted model:
# treatment_tank %in% treatment"
m1_lm <- list()
m1_lm_emm <- list()
for(j in 1:length(lm_cols_x13)){
  # model fit
  form_j <- paste0(lm_cols_x13[j],
                   " ~ stream_id * treatment_tank") %>%
    formula()
  m1 <- lm(form_j, data = f2)


  # inference

  m1_emm <- emmeans(m1,
                    specs = c("stream_id", "treatment_tank"))
  
  # save to list
  m1_lm[[length(m1_lm)+1]] <- m1
  m1_lm_emm[[length(m1_lm_emm)+1]] <- summary(m1_emm)
}
names(m1_lm) <- lm_cols_x13
names(m1_lm_emm) <- lm_cols_x13
m1_gg <- list()
p <- length(lm_cols_x13)
pd <- position_dodge(0.4)
for(j in 1:p){
  m1 <- m1_lm[[j]]
  m1_emm <- m1_lm_emm[[j]]
  gg <- ggplot(data = f2,
               aes(x = treatment_tank,
                   y = get(lm_cols_x13[j]),
                   color = stream_id)) +
    
    # individual points
    geom_point(position = pd, alpha = 0.5) +
    
    # group means
    geom_point(data = m1_emm,
               aes(x = treatment_tank,
                   y = emmean,
                   color = stream_id),
               size = 3,
               position = pd) +
    

    ylab(lm_cols_x13[j]) +
    theme_pubr() +
#    scale_color_manual(values = pal_okabe_ito_blue) +
    scale_x_discrete(labels = label_wrap(10))
    NULL
  
  m1_gg[[length(m1_gg)+1]] <- ggplotGrob(gg)
}
names(m1_gg) <- lm_cols_x13

plot_grid(m1_gg[[1]],
          m1_gg[[2]], nrow = 1)

plot_grid(m1_gg[[3]],
          m1_gg[[4]], nrow = 1)

plot_grid(m1_gg[[5]],
          m1_gg[[6]], nrow = 1)

plot_grid(m1_gg[[7]],
          m1_gg[[8]], nrow = 1)

plot_grid(m1_gg[[9]],
          m1_gg[[10]], nrow = 1)

plot_grid(m1_gg[[11]],
          m1_gg[[12]], nrow = 1)

plot_grid(m1_gg[[13]],
          m1_gg[[14]], nrow = 1)

plot_grid(m1_gg[[15]],
          m1_gg[[16]], nrow = 1)

plot_grid(m1_gg[[17]],
          m1_gg[[18]], nrow = 1)

plot_grid(m1_gg[[19]],
          m1_gg[[20]], nrow = 1)

plot_grid(m1_gg[[21]],
          m1_gg[[22]], nrow = 1)

plot_grid(m1_gg[[23]],
          m1_gg[[24]], nrow = 1)

plot_grid(m1_gg[[25]])

5.1.2 by family

m1_gg <- list()
p <- length(lm_cols_x13)
pd <- position_dodge(0.4)
for(j in 1:p){
  greenhouse <- f2[treatment_tank != "No Flow f2" &
                     stream_id != "guan",
                   .(y_j = mean(get(lm_cols_x13[j]))),
                   by = .(stream_id, treatment, treatment_tank, family)]
  # model fit
  # form_j <- " y_j ~ stream_id * treatment_tank + (1|family)" %>%
  #   formula()
  # m1 <- lmer(form_j, data = greenhouse)
  # 
  form_j <- " y_j ~ stream_id + treatment_tank + (1|family)" %>%
    formula()
  m1 <- lmer(form_j, data = greenhouse)

  form_j <- " y_j ~ stream_id + treatment + (1|treatment_tank)" %>%
    formula()
  m1 <- lmer(form_j, data = greenhouse)

  form_j <- " y_j ~ stream_id + treatment_tank + (treatment_tank|family)" %>%
    formula()
  #m1 <- aov_4(form_j, data = greenhouse)


  gg <- ggplot(data = greenhouse,
               aes(x = treatment_tank,
                   y = y_j,
                   color = family,
                   group = family)) +
    
    # individual points
    geom_point(position = pd, size = 2, show.legend = FALSE) +
    
    ylab(lm_cols_x13[j]) +
    theme_pubr() +
#    scale_color_manual(values = pal_okabe_ito_blue) +
    scale_x_discrete(labels = label_wrap(10))
    NULL
  
  m1_gg[[length(m1_gg)+1]] <- ggplotGrob(gg)
}
names(m1_gg) <- lm_cols_x13

n_rows <- 2
plot_grid(m1_gg[[1]],
          m1_gg[[2]], nrow = n_rows)

plot_grid(m1_gg[[3]],
          m1_gg[[4]], nrow = n_rows)

plot_grid(m1_gg[[5]],
          m1_gg[[6]], nrow = n_rows)

plot_grid(m1_gg[[7]],
          m1_gg[[8]], nrow = n_rows)

plot_grid(m1_gg[[9]],
          m1_gg[[10]], nrow = n_rows)

plot_grid(m1_gg[[11]],
          m1_gg[[12]], nrow = n_rows)

plot_grid(m1_gg[[13]],
          m1_gg[[14]], nrow = n_rows)

plot_grid(m1_gg[[15]],
          m1_gg[[16]], nrow = n_rows)

plot_grid(m1_gg[[17]],
          m1_gg[[18]], nrow = n_rows)

plot_grid(m1_gg[[19]],
          m1_gg[[20]], nrow = n_rows)

plot_grid(m1_gg[[21]],
          m1_gg[[22]], nrow = n_rows)

plot_grid(m1_gg[[23]],
          m1_gg[[24]], nrow = n_rows)

plot_grid(m1_gg[[25]])

5.1.3 by family mean

m1_gg <- list()
p <- length(lm_cols_x13)
pd <- position_dodge(0.4)
for(j in 1:p){
  greenhouse <- f2[treatment_tank != "No Flow f2",
                 .(y_j = mean(get(lm_cols_x13[j]))),
                 by = .(stream_id, treatment_tank, family)]

  gg <- ggplot(data = greenhouse,
               aes(x = treatment_tank,
                   y = y_j,
                   color = stream_id,
                   group = family)) +
    
    # individual points
    geom_point(position = pd, size = 2) +
    
    geom_line(position = pd, alpha = 0.5) +
    
    ylab(lm_cols_x13[j]) +
    theme_pubr() +
    scale_color_manual(values = pal_okabe_ito_blue) +
    scale_x_discrete(labels = label_wrap(10))
    NULL
  
  m1_gg[[length(m1_gg)+1]] <- ggplotGrob(gg)
}
names(m1_gg) <- lm_cols_x13

plot_grid(m1_gg[[1]],
          m1_gg[[2]], nrow = 1)

plot_grid(m1_gg[[3]],
          m1_gg[[4]], nrow = 1)

plot_grid(m1_gg[[5]],
          m1_gg[[6]], nrow = 1)

plot_grid(m1_gg[[7]],
          m1_gg[[8]], nrow = 1)

plot_grid(m1_gg[[9]],
          m1_gg[[10]], nrow = 1)

plot_grid(m1_gg[[11]],
          m1_gg[[12]], nrow = 1)

plot_grid(m1_gg[[13]],
          m1_gg[[14]], nrow = 1)

plot_grid(m1_gg[[15]],
          m1_gg[[16]], nrow = 1)

plot_grid(m1_gg[[17]],
          m1_gg[[18]], nrow = 1)

plot_grid(m1_gg[[19]],
          m1_gg[[20]], nrow = 1)

plot_grid(m1_gg[[21]],
          m1_gg[[22]], nrow = 1)

plot_grid(m1_gg[[23]],
          m1_gg[[24]], nrow = 1)

plot_grid(m1_gg[[25]])

5.2 ANCOVA fits

  1. Model fits to each coordinate
# supress message "NOTE: A nesting structure was detected in the fitted model:
# treatment_tank %in% treatment"
m2_lm <- list()
m2_lm_emm <- list()
for(j in 1:length(lm_cols_x13)){
  # model fit
  form_j <- paste0(lm_cols_x13[j],
                   " ~ stream_id * treatment + treatment_tank + median_size") %>%
    formula()
  m2 <- lm(form_j, data = f2)

  # inference
  # emmean is expected mean of coordinate at the average median size across all fish
  m2_emm <- emmeans(m2,
                    specs = c("stream_id", "treatment", "treatment_tank"))
  
  # save to list
  m2_lm[[length(m2_lm)+1]] <- m2
  m2_lm_emm[[length(m2_lm_emm)+1]] <- summary(m2_emm)
}
names(m2_lm) <- lm_cols_x13
names(m2_lm_emm) <- lm_cols_x13
m2_gg <- list()
p <- length(lm_cols_x13)
pd <- position_dodge(0.4)
for(j in 1:p){
  
  m2 <- m2_lm[[j]]
  m2_emm <- m2_lm_emm[[j]]
  gg <- ggplot(data = f2,
               aes(x = median_size,
                   y = get(lm_cols_x13[j]),
                   color = stream_id)) +
    
    # individual points
    geom_point(alpha = 0.5) +
    

    ylab(lm_cols_x13[j]) +
    theme_pubr() +
    scale_color_manual(values = pal_okabe_ito_blue) +
#    scale_x_discrete(labels = label_wrap(10))
    NULL
  
  m2_gg[[length(m2_gg)+1]] <- ggplotGrob(gg)
}
names(m2_gg) <- lm_cols_x13

plot_grid(m2_gg[[1]],
          m2_gg[[2]], nrow = 1)

plot_grid(m2_gg[[3]],
          m2_gg[[4]], nrow = 1)

plot_grid(m2_gg[[5]],
          m2_gg[[6]], nrow = 1)

plot_grid(m2_gg[[7]],
          m2_gg[[8]], nrow = 1)

plot_grid(m2_gg[[9]],
          m2_gg[[10]], nrow = 1)

plot_grid(m2_gg[[11]],
          m2_gg[[12]], nrow = 1)

plot_grid(m2_gg[[13]],
          m2_gg[[14]], nrow = 1)

plot_grid(m2_gg[[15]],
          m2_gg[[16]], nrow = 1)

plot_grid(m2_gg[[17]],
          m2_gg[[18]], nrow = 1)

plot_grid(m2_gg[[19]],
          m2_gg[[20]], nrow = 1)

plot_grid(m2_gg[[21]],
          m2_gg[[22]], nrow = 1)

plot_grid(m2_gg[[23]],
          m2_gg[[24]], nrow = 1)

plot_grid(m2_gg[[25]])

m2_gg <- list()
p <- length(lm_cols_x13)
pd <- position_dodge(0.4)
for(j in 1:p){
  m2 <- m2_lm[[j]]
  m2_emm <- m2_lm_emm[[j]]
  gg <- ggplot(data = f2,
               aes(x = treatment_tank,
                   y = get(lm_cols_x13[j]),
                   color = stream_id)) +
    
    # individual points
    geom_point(position = pd, alpha = 0.5) +
    
    # group means
    geom_point(data = m2_emm,
               aes(x = treatment_tank,
                   y = emmean,
                   color = stream_id),
               size = 3,
               position = pd) +
    

    ylab(lm_cols_x13[j]) +
    theme_pubr() +
    scale_color_manual(values = pal_okabe_ito_blue) +
    scale_x_discrete(labels = label_wrap(10))
    NULL
  
  m2_gg[[length(m2_gg)+1]] <- ggplotGrob(gg)
}
names(m2_gg) <- lm_cols_x13

plot_grid(m2_gg[[1]],
          m2_gg[[2]], nrow = 1)

plot_grid(m2_gg[[3]],
          m2_gg[[4]], nrow = 1)

plot_grid(m2_gg[[5]],
          m2_gg[[6]], nrow = 1)

plot_grid(m2_gg[[7]],
          m2_gg[[8]], nrow = 1)

plot_grid(m2_gg[[9]],
          m2_gg[[10]], nrow = 1)

plot_grid(m2_gg[[11]],
          m2_gg[[12]], nrow = 1)

plot_grid(m2_gg[[13]],
          m2_gg[[14]], nrow = 1)

plot_grid(m2_gg[[15]],
          m2_gg[[16]], nrow = 1)

plot_grid(m2_gg[[17]],
          m2_gg[[18]], nrow = 1)

plot_grid(m2_gg[[19]],
          m2_gg[[20]], nrow = 1)

plot_grid(m2_gg[[21]],
          m2_gg[[22]], nrow = 1)

plot_grid(m2_gg[[23]],
          m2_gg[[24]], nrow = 1)

plot_grid(m2_gg[[25]])